Chapter 5 Community composition

load("data/data_27022025.Rdata")

5.1 Taxonomy overview

5.1.1 Stacked barplot

# Merge data frames based on sample
# transplants_metadata <- sample_metadata %>%
#   mutate(Tube_code = str_remove_all(Tube_code, "_a"))
# transplants_metadata$newID <- paste(transplants_metadata$Tube_code,
#                                     "_",
#                                     transplants_metadata$individual)

merged_data <- genome_counts_filt %>%
  mutate_at(vars(-genome),  ~ . / sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == Tube_code)) %>% #append sample metadata
  filter(count > 0) #filter 0 counts

ggplot(merged_data, aes(
  x = sample,
  y = count,
  fill = phylum,
  group = phylum
)) + #grouping enables keeping the same sorting of taxonomic units
  geom_bar(stat = "identity",
           colour = "white",
           linewidth = 0.1) + #plot stacked bars with white borders
  scale_fill_manual(values = phylum_colors) +
  facet_nested(. ~ time_point + type , scales = "free") + #facet per day and treatment
  guides(fill = guide_legend(ncol = 1)) +
  labs(fill = "Phylum", y = "Relative abundance", x = "Sample") +
  theme(
    axis.text.x = element_text(
      angle = 45,
      hjust = 1,
      size = 0
    ),
    strip.text.x = element_text(
      size = 14,
      colour = "black",
      face = "bold"
    ),
    strip.background = element_rect(fill ="lightgrey"),
    axis.title = element_text(size = 18, face = "bold"),
    panel.background = element_blank(),
    legend.title = element_text(size = 20, face = "bold"),
    legend.text = element_text(size = 16)
  )

5.1.1.1 Wild samples

merged_data  %>%
  filter(time_point=="Wild")  %>%
  ggplot(aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(. ~ Population,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    labs(fill="Phylum",y = "Relative abundance",x="Sample")+
  theme(
    axis.text.x = element_text(
      angle = 45,
      hjust = 1,
      size = 0
    ),
    strip.text.x = element_text(
      size = 14,
      colour = "black",
      face = "bold"
    ),
    axis.title = element_text(size = 18, face = "bold"),
    panel.background = element_blank(),
    strip.background = element_rect(fill ="lightgrey"),
    legend.title = element_text(size = 20, face = "bold"),
    legend.text = element_text(size = 16)
  )

5.1.2 Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == Tube_code)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum) %>%
  summarise(relabun=sum(count))

5.1.2.1 Cold and wet population

phylum_summary %>%
    left_join(sample_metadata, by = join_by(sample == Tube_code))  %>%
    group_by(phylum, Population) %>%
    filter(Population=="Cold_wet" & time_point=="Wild") %>%
    summarise(total_mean=mean(relabun*100, na.rm=T),
              total_sd=sd(relabun*100, na.rm=T))  %>%
  filter(total_mean!=0) %>% 
    mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2))) %>% 
    arrange(-total_mean) %>% 
    dplyr::select(phylum,total) %>% 
    tt()
======= .table td.tinytable_css_hxoz95aevlkhwp2salnh, .table th.tinytable_css_hxoz95aevlkhwp2salnh { border-top: solid #d3d8dc 0.1em; border-bottom: solid #d3d8dc 0.05em; } .table td.tinytable_css_9sj4nu1760wtc28cpufd, .table th.tinytable_css_9sj4nu1760wtc28cpufd { border-bottom: solid #d3d8dc 0.1em; }
>>>>>>> d08d3ecaca65f0411fd9dfdf7dedf41ce523b6b2
phylum total
p__Bacteroidota 41.93±15.4
p__Bacillota_A 33.96±16.7
p__Bacillota 5.73±4.73
p__Campylobacterota 5.39±4.64
p__Verrucomicrobiota 4.36±3.06
p__Pseudomonadota 3.75±3.76
p__Desulfobacterota 2.31±1.6
p__Bacillota_C 0.85±0.82
p__Cyanobacteriota 0.64±0.83
p__Bacillota_B 0.5±0.77
p__Fusobacteriota 0.37±1.28
p__Spirochaetota 0.12±0.35
p__Actinomycetota 0.09±0.22

5.1.2.2 Hot and dry population

phylum_summary %>%
    left_join(sample_metadata, by = join_by(sample == Tube_code))  %>%
    group_by(phylum, Population) %>%
    filter(Population=="Hot_dry" & time_point=="Wild") %>%
    summarise(total_mean=mean(relabun*100, na.rm=T),
              total_sd=sd(relabun*100, na.rm=T))  %>%
  filter(total_mean!=0) %>% 
    mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2))) %>% 
    arrange(-total_mean) %>% 
    dplyr::select(phylum,total) %>% 
    tt()
======= .table td.tinytable_css_qe2xj4hj7k0issic1cey, .table th.tinytable_css_qe2xj4hj7k0issic1cey { border-bottom: solid #d3d8dc 0.1em; } .table td.tinytable_css_7lcgh7q5aar9mxoeboxz, .table th.tinytable_css_7lcgh7q5aar9mxoeboxz { border-top: solid #d3d8dc 0.1em; border-bottom: solid #d3d8dc 0.05em; }
>>>>>>> d08d3ecaca65f0411fd9dfdf7dedf41ce523b6b2
phylum total
p__Bacillota_A 33.9±16.17
p__Bacteroidota 27.63±17.64
p__Campylobacterota 13.53±20.98
p__Pseudomonadota 10.92±9.94
p__Bacillota 6.09±7.66
p__Spirochaetota 2.66±2.78
p__Desulfobacterota 1.69±1.85
p__Fusobacteriota 1.34±1.73
p__Bacillota_B 0.76±0.73
p__Bacillota_C 0.73±0.5
p__Cyanobacteriota 0.52±0.65
p__Chlamydiota 0.12±0.18
p__Verrucomicrobiota 0.06±0.1
p__Elusimicrobiota 0.05±0.14

5.2 Taxonomy boxplot

5.2.1 Family

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == Tube_code)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family) %>%
  summarise(relabun=sum(count))
family_arrange <- family_summary %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    pull()

family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==Tube_code)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~type)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")
<<<<<<< HEAD

=======

>>>>>>> d08d3ecaca65f0411fd9dfdf7dedf41ce523b6b2

5.2.2 Genus

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == Tube_code)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__")
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

genus_summary %>%
    left_join(genome_metadata %>% select(genus,phylum) %>% unique(),by=join_by(genus==genus)) %>%
    left_join(sample_metadata,by=join_by(sample==Tube_code)) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    filter(genus %in% genus_arrange[1:20]) %>%
    mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors[-c(3,4,6,8)]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~type)+
        theme_minimal() + 
        labs(y="Genus", x="Relative abundance", color="Phylum")
<<<<<<< HEAD

=======

>>>>>>> d08d3ecaca65f0411fd9dfdf7dedf41ce523b6b2